# Packages and dependence
packageCheckClassic <- function(x){
#
for( i in x ){
if( ! require( i , character.only = TRUE ) ){
install.packages( i , dependencies = TRUE )
require( i , character.only = TRUE )
}
}
}
packageCheckClassic(c('gridExtra','DESeq2','adegenet','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Loading required package: gridExtra
## Loading required package: DESeq2
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: adegenet
## Loading required package: ade4
##
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
##
## score
## The following object is masked from 'package:BiocGenerics':
##
## score
##
## /// adegenet 2.1.10 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
## Loading required package: devtools
## Loading required package: usethis
## Loading required package: BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.18'
## is available with R version '4.3'; see https://bioconductor.org/install
##
## Attaching package: 'BiocManager'
## The following object is masked from 'package:devtools':
##
## install
## Loading required package: ggplot2
## Loading required package: ggrepel
## Loading required package: markdown
## Loading required package: pheatmap
## Loading required package: RColorBrewer
## Loading required package: genefilter
##
## Attaching package: 'genefilter'
## The following objects are masked from 'package:MatrixGenerics':
##
## rowSds, rowVars
## The following objects are masked from 'package:matrixStats':
##
## rowSds, rowVars
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
## Loading required package: vegan
## Loading required package: permute
##
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
##
## check
## Loading required package: lattice
## This is vegan 2.6-4
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.1.3
##
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
##
## plotMA
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (25fd3b55) has not changed since last install.
## Use `force = TRUE` to force installation
library("adegenet")
library('ggvenn')
## Loading required package: grid
library('tximport')
library('apeglm')
library('ashr')
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library('pairwiseAdonis')
## Loading required package: cluster
library('BiocManager')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Functions
trimFunction <- function(resLFC,p_adj_cut,lfc_cut) {
resOrdered<-resLFC[order(resLFC$padj),]
y<-na.omit(resOrdered)
resTrim<-y[y$padj < p_adj_cut,]
resTrim <- resTrim[ which( resTrim$log2FoldChange > lfc_cut | resTrim$log2FoldChange < -lfc_cut), ]
resTrim <- as.data.frame(resTrim)
resTrim$genes <- rownames(resTrim)
return(resTrim)
}
applyAnnot <- function(inputDf,inputDfAnnot) {
gene_and_annot_col <- character(0)
inputDf_annot_genes <- inputDfAnnot$genes
inputDf_annot_pfam <- inputDfAnnot$pfam_annotation
for (i in 1:length(inputDf_annot_genes)) {
gene_and_annot_col <- c(gene_and_annot_col,
paste(inputDf_annot_genes[i], " - ", inputDf_annot_pfam[i]))
}
inputDf_genes <- inputDf$genes
new_gene_annot_col <- character(0)
for (i in inputDf_genes) {
gene <- i
for (j in gene_and_annot_col) {
j_split <- strsplit(j, " ", fixed = TRUE)[[1]]
if (i %in% j_split[1]) {
gene <- j
}
}
new_gene_annot_col <- c(new_gene_annot_col, gene)
}
inputDf$genes <- new_gene_annot_col
return(inputDf)
}
heatmapFunction <- function(newColNames,commonGenes,commonGenesAll,vsd,nameCode) {
vsdCommonGm <- vsd[commonGenes$genes,]
vsdCommonGm@colData@rownames = newColNames
annot_genes = commonGenesAll$genes
genes = names(vsdCommonGm)
new_names = list()
for (i in annot_genes) {
gene = i
for (j in genes) {
if (i == j) {
gene = j
}
}
new_names <- c(new_names,gene)
}
names(vsdCommonGm) <- new_names
pheatmap(assay(vsdCommonGm),main=nameCode,scale="row", cluster_rows=T, show_rownames=T,
cluster_cols=FALSE,cellwidth = 20,fontsize_row = 4)
return(vsdCommonGm)
}
similarityFunction <- function(dataset1,dataset2) {
c = 0
for (i in rownames(dataset1)) {
if (i %in% rownames(dataset2)) {
c = c + 1
}
}
sim = (c / as.integer(length(rownames(dataset2)))) * 100
return(sim)
}
# Nov2016 dataset
# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samples<-read.table('tximport_design_preliminarySamples.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/nov2016'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/preliminarySamples/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/nov2016", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts, colData = samples,design = ~site)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "site_pv_vs_gm"
## [3,] "site_sa_vs_gm"
# General
vsdNov2016 = vst(dds,blind=T)
pcaData = plotPCA(vsdNov2016, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "nov2016 dataset") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Common gm
res_pv_gm_lfc2_intra<-results(dds, contrast=c("site","pv","gm"), lfcThreshold = 1,alpha = 0.05)
res_pv_gm_lfc2_intra<-trimFunction(res_pv_gm_lfc2_intra,0.05,1)
res_sa_gm_lfc2_intra<-results(dds, contrast=c("site","sa","gm"), lfcThreshold = 1,alpha = 0.05)
res_sa_gm_lfc2_intra<-trimFunction(res_sa_gm_lfc2_intra,0.05,1)
common_gm_lfc2_intra <- merge(res_pv_gm_lfc2_intra,res_sa_gm_lfc2_intra,by="genes")
common_gm_annot_lfc2_intra = merge(common_gm_lfc2_intra,A_pfam,by="genes")
common_gm_all_lfc2_intra <- applyAnnot(common_gm_lfc2_intra,common_gm_annot_lfc2_intra)
newColNames = c("gm","gm","gm","gm","pv","pv","pv","sp","sp","sp")
vsd_common_gm_lfc2_intra_nov2016 = heatmapFunction(newColNames,common_gm_lfc2_intra,common_gm_all_lfc2_intra,vsdNov2016,"Nov2016 common GM genes - Intra method")

pcaData = plotPCA(vsd_common_gm_lfc2_intra_nov2016, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "nov2016 dataset - Common GM DEGs") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Common sa
res_pv_sa_lfc2_intra<-results(dds, contrast=c("site","pv","sa"), lfcThreshold = 1,alpha = 0.05)
res_pv_sa_lfc2_intra<-trimFunction(res_pv_sa_lfc2_intra,0.05,1)
res_gm_sa_lfc2_intra<-results(dds, contrast=c("site","gm","sa"), lfcThreshold = 1,alpha = 0.05)
res_gm_sa_lfc2_intra<-trimFunction(res_gm_sa_lfc2_intra,0.05,1)
common_sa_lfc2_intra <- merge(res_pv_sa_lfc2_intra,res_gm_sa_lfc2_intra,by="genes")
common_sa_annot_lfc2_intra = merge(common_sa_lfc2_intra,A_pfam,by="genes")
common_sa_all_lfc2_intra <- applyAnnot(common_sa_lfc2_intra,common_sa_annot_lfc2_intra)
newColNames = c("gm","gm","gm","gm","pv","pv","pv","sp","sp","sp")
vsd_common_sa_lfc2_intra_nov2016 = heatmapFunction(newColNames,common_sa_lfc2_intra,common_sa_all_lfc2_intra,vsdNov2016,"Nov2016 common SA genes - Intra method")

pcaData = plotPCA(vsd_common_sa_lfc2_intra_nov2016, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "nov2016 dataset - Common GM DEGs") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Common pv
res_sa_pv_lfc2_intra<-results(dds, contrast=c("site","sa","pv"), lfcThreshold = 1,alpha = 0.05)
res_sa_pv_lfc2_intra<-trimFunction(res_pv_gm_lfc2_intra,0.05,1)
res_gm_pv_lfc2_intra<-results(dds, contrast=c("site","gm","pv"), lfcThreshold = 1,alpha = 0.05)
res_gm_pv_lfc2_intra<-trimFunction(res_gm_pv_lfc2_intra,0.05,1)
common_pv_lfc2_intra <- merge(res_sa_pv_lfc2_intra,res_gm_pv_lfc2_intra,by="genes")
common_pv_annot_lfc2_intra = merge(common_pv_lfc2_intra,A_pfam,by="genes")
common_pv_all_lfc2_intra <- applyAnnot(common_pv_lfc2_intra,common_pv_annot_lfc2_intra)
newColNames = c("gm","gm","gm","gm","pv","pv","pv","sa","sa","sa")
vsd_common_pv_lfc2_intra_nov2016 = heatmapFunction(newColNames,common_pv_lfc2_intra,common_pv_all_lfc2_intra,vsdNov2016,"Nov2016 common PV genes - Intra method")

pcaData = plotPCA(vsd_common_pv_lfc2_intra_nov2016, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "nov2016 dataset - Common GM DEGs") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inferences statistics
count_tab_assay <- assay(vsdNov2016)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samples,dist_tab_assay ~ site, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ site, data = samples, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## site 2 15225 0.35131 1.8955 0.002 **
## Residual 7 28114 0.64869
## Total 9 43339 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samples$site)
## Set of permutations < 'minperm'. Generating entire set.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 gm vs pv 1 6659.756 1.607662 0.2433027 0.032 0.096
## 2 gm vs sa 1 8500.558 2.183983 0.3040073 0.020 0.060
## 3 pv vs sa 1 7688.511 1.915740 0.3238377 0.100 0.300
mod <- betadisper(dist_tab_assay,samples$site)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv-gm -0.9652221 -19.16340 17.23295 0.9866641
## sa-gm -5.0820012 -23.28018 13.11617 0.7020122
## sa-pv -4.1167790 -23.57145 15.33789 0.8125071
# May 2018 dataset
# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesBck<-read.table('tximport_design_trueTransplant_bck.txt',header=T)
samplesTroBck<-read.table('tximport_design_trueTransplant_tro_bck.txt',header=T)
samplesTroBck2<-read.table('tximport_design_trueTransplant_tro_bck_2.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/trueTransplant/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_bck <- raw_counts[,grep("bck", colnames(raw_counts))]
raw_counts_bck_tro <- raw_counts[,grep("bck|tro", colnames(raw_counts))]
# DDS object
ddsBck<-DESeqDataSetFromMatrix(countData = raw_counts_bck, colData = samplesBck,design = ~site)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ddsBckTro<-DESeqDataSetFromMatrix(countData = raw_counts_bck_tro, colData = samplesTroBck,design = ~site + experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ddsBckTro2<-DESeqDataSetFromMatrix(countData = raw_counts_bck_tro, colData = samplesTroBck2,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(ddsBck)) >= 10
ddsBck <- ddsBck[keep,]
keep <- rowSums(counts(ddsBckTro)) >= 10
ddsBckTro <- ddsBckTro[keep,]
keep <- rowSums(counts(ddsBckTro2)) >= 10
ddsBckTro2 <- ddsBckTro2[keep,]
# Differential expression analysis
ddsBck<-DESeq(ddsBck)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsBck))
## [,1]
## [1,] "Intercept"
## [2,] "site_pv_vs_gm"
## [3,] "site_sp_vs_gm"
ddsBckTro<-DESeq(ddsBckTro)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsBckTro))
## [,1]
## [1,] "Intercept"
## [2,] "site_pv_vs_gm"
## [3,] "site_sp_vs_gm"
## [4,] "experiment_tro_vs_bck"
ddsBckTro2<-DESeq(ddsBckTro2)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsBckTro2))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_gm_gm_tro_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_pv_pv_tro_vs_gm_gm_bck"
## [5,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_gm_bck"
## [6,] "originSite_finalSite_experiment_sp_sp_tro_vs_gm_gm_bck"
# General
vsdMay2018Bck = vst(ddsBck,blind=T)
vsdMay2018BckTro = vst(ddsBckTro,blind=T)
vsdMay2018BckTro2 = vst(ddsBckTro2,blind=T)
pcaData = plotPCA(vsdMay2018Bck, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - Background") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

pcaData = plotPCA(vsdMay2018BckTro, intgroup=c("site","experiment"),returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site,shape = experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
scale_shape_manual(values = c("circle", "triangle")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - Background & transplant origin") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Common gm
res_pv_gm_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_gm_lfc2<-trimFunction(res_pv_gm_lfc2,0.05,1)
res_sp_gm_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_gm_lfc2<-trimFunction(res_sp_gm_lfc2,0.05,1)
common_gm_lfc2 <- merge(res_pv_gm_lfc2,res_sp_gm_lfc2,by="genes")
common_gm_annot_lfc2 = merge(common_gm_lfc2,A_pfam,by="genes")
common_gm_all_lfc2 <- applyAnnot(common_gm_lfc2,common_gm_annot_lfc2)
newColNames = c("gm_gm_bck","gm_gm_bck","gm_gm_bck","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","pv_pv_bck","pv_pv_bck","pv_pv_bck","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","sp_sp_bck","sp_sp_bck","sp_sp_bck","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro")
vsd_common_gm_may2018 = heatmapFunction(newColNames,common_gm_lfc2,common_gm_all_lfc2,vsdMay2018BckTro2,"May2018 common GM genes")

# Common pv
res_gm_pv_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_pv_lfc2<-trimFunction(res_gm_pv_lfc2,0.05,1)
res_sp_pv_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","sp_sp_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_pv_lfc2<-trimFunction(res_sp_pv_lfc2,0.05,1)
common_pv_lfc2 <- merge(res_gm_pv_lfc2,res_sp_pv_lfc2,by="genes")
common_pv_annot_lfc2 = merge(common_pv_lfc2,A_pfam,by="genes")
common_pv_all_lfc2 <- applyAnnot(common_pv_lfc2,common_pv_annot_lfc2)
newColNames = c("gm_gm_bck","gm_gm_bck","gm_gm_bck","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","pv_pv_bck","pv_pv_bck","pv_pv_bck","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","sp_sp_bck","sp_sp_bck","sp_sp_bck","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro")
vsd_common_pv_may2018 = heatmapFunction(newColNames,common_pv_lfc2,common_pv_all_lfc2,vsdMay2018BckTro2,"May2018 common PV genes")

# Common sp
res_gm_sp_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_sp_lfc2<-trimFunction(res_gm_sp_lfc2,0.05,1)
res_pv_sp_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","pv_pv_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_sp_lfc2<-trimFunction(res_pv_sp_lfc2,0.05,1)
common_sp_lfc2 <- merge(res_gm_sp_lfc2,res_pv_sp_lfc2,by="genes")
common_sp_annot_lfc2 = merge(common_sp_lfc2,A_pfam,by="genes")
common_sp_all_lfc2 <- applyAnnot(common_sp_lfc2,common_sp_annot_lfc2)
newColNames = c("gm_gm_bck","gm_gm_bck","gm_gm_bck","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","pv_pv_bck","pv_pv_bck","pv_pv_bck","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","sp_sp_bck","sp_sp_bck","sp_sp_bck","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro")
vsd_common_sp_may2018 = heatmapFunction(newColNames,common_sp_lfc2,common_sp_all_lfc2,vsdMay2018BckTro2,"May2018 common SP genes")

# Inferences statistics
count_tab_assay <- assay(vsdMay2018Bck)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesBck,dist_tab_assay ~ site, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ site, data = samplesBck, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## site 2 16020 0.37447 1.7959 0.003 **
## Residual 6 26761 0.62553
## Total 8 42782 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesBck$site)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 gm vs pv 1 8099.765 1.925246 0.3249226 0.1 0.3
## 2 gm vs sp 1 7586.838 1.760155 0.3055742 0.1 0.3
## 3 pv vs sp 1 8344.058 1.715727 0.3001765 0.1 0.3
mod <- betadisper(dist_tab_assay,samplesBck$site)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv-gm 7.00318 -1.8578996 15.86426 0.1123292
## sp-gm 8.24530 -0.6157798 17.10638 0.0651578
## sp-pv 1.24212 -7.6189601 10.10320 0.9046166
count_tab_assay <- assay(vsdMay2018BckTro)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesTroBck,dist_tab_assay ~ site + experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ site + experiment, data = samplesTroBck, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## site 2 39247 0.20226 2.7914 0.001 ***
## experiment 1 7169 0.03695 1.0198 0.365
## Residual 21 147630 0.76080
## Total 24 194047 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesTroBck2$originSite_finalSite_experiment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 gm_gm_bck vs gm_gm_tro 1 5932.909 1.0428908 0.1480771 0.388 1.000
## 2 gm_gm_bck vs pv_pv_bck 1 10402.253 1.8742356 0.3190603 0.100 1.000
## 3 gm_gm_bck vs pv_pv_tro 1 12721.327 1.8858460 0.2122303 0.025 0.375
## 4 gm_gm_bck vs sp_sp_bck 1 9753.032 1.7230167 0.3010679 0.100 1.000
## 5 gm_gm_bck vs sp_sp_tro 1 14682.064 1.8902571 0.2395685 0.022 0.330
## 6 gm_gm_tro vs pv_pv_bck 1 13033.488 2.1235643 0.2614080 0.021 0.315
## 7 gm_gm_tro vs pv_pv_tro 1 17898.868 2.6047513 0.2244556 0.004 0.060
## 8 gm_gm_tro vs sp_sp_bck 1 12833.151 2.0661667 0.2561522 0.020 0.300
## 9 gm_gm_tro vs sp_sp_tro 1 18502.635 2.4175319 0.2320638 0.013 0.195
## 10 pv_pv_bck vs pv_pv_tro 1 6707.946 0.9407742 0.1184739 0.542 1.000
## 11 pv_pv_bck vs sp_sp_bck 1 10716.336 1.6920331 0.2972634 0.100 1.000
## 12 pv_pv_bck vs sp_sp_tro 1 11559.789 1.4070061 0.1899561 0.107 1.000
## 13 pv_pv_tro vs sp_sp_bck 1 10597.093 1.4731946 0.1738653 0.023 0.345
## 14 pv_pv_tro vs sp_sp_tro 1 12536.034 1.5181994 0.1443402 0.035 0.525
## 15 sp_sp_bck vs sp_sp_tro 1 8377.115 1.0105796 0.1441507 0.431 1.000
## sig
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
mod <- betadisper(dist_tab_assay,samplesTroBck2$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## gm_gm_tro-gm_gm_bck 12.644076 -12.863302 38.15145 0.6286622
## pv_pv_bck-gm_gm_bck 7.423454 -21.094661 35.94157 0.9596342
## pv_pv_tro-gm_gm_bck 21.751592 -2.945821 46.44900 0.1041111
## sp_sp_bck-gm_gm_bck 8.592794 -19.925321 37.11091 0.9273395
## sp_sp_tro-gm_gm_bck 27.534088 2.026710 53.04147 0.0299137
## pv_pv_bck-gm_gm_tro -5.220622 -30.728000 20.28676 0.9856643
## pv_pv_tro-gm_gm_tro 9.107516 -12.042085 30.25712 0.7487049
## sp_sp_bck-gm_gm_tro -4.051282 -29.558660 21.45610 0.9954921
## sp_sp_tro-gm_gm_tro 14.890012 -7.200025 36.98005 0.3142160
## pv_pv_tro-pv_pv_bck 14.328138 -10.369275 39.02555 0.4695593
## sp_sp_bck-pv_pv_bck 1.169340 -27.348775 29.68746 0.9999939
## sp_sp_tro-pv_pv_bck 20.110634 -5.396744 45.61801 0.1759622
## sp_sp_bck-pv_pv_tro -13.158798 -37.856210 11.53861 0.5578661
## sp_sp_tro-pv_pv_tro 5.782496 -15.367104 26.93210 0.9506257
## sp_sp_tro-sp_sp_bck 18.941294 -6.566084 44.44867 0.2243494
count_tab_assay <- assay(vsdMay2018BckTro2)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesTroBck2,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesTroBck2, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 5 60265 0.31057 1.7118 0.001 ***
## Residual 19 133782 0.68943
## Total 24 194047 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesTroBck2$originSite_finalSite_experiment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 gm_gm_bck vs gm_gm_tro 1 5932.909 1.0428908 0.1480771 0.343 1.000
## 2 gm_gm_bck vs pv_pv_bck 1 10402.253 1.8742356 0.3190603 0.100 1.000
## 3 gm_gm_bck vs pv_pv_tro 1 12721.327 1.8858460 0.2122303 0.029 0.435
## 4 gm_gm_bck vs sp_sp_bck 1 9753.032 1.7230167 0.3010679 0.100 1.000
## 5 gm_gm_bck vs sp_sp_tro 1 14682.064 1.8902571 0.2395685 0.020 0.300
## 6 gm_gm_tro vs pv_pv_bck 1 13033.488 2.1235643 0.2614080 0.021 0.315
## 7 gm_gm_tro vs pv_pv_tro 1 17898.868 2.6047513 0.2244556 0.004 0.060
## 8 gm_gm_tro vs sp_sp_bck 1 12833.151 2.0661667 0.2561522 0.018 0.270
## 9 gm_gm_tro vs sp_sp_tro 1 18502.635 2.4175319 0.2320638 0.017 0.255
## 10 pv_pv_bck vs pv_pv_tro 1 6707.946 0.9407742 0.1184739 0.523 1.000
## 11 pv_pv_bck vs sp_sp_bck 1 10716.336 1.6920331 0.2972634 0.100 1.000
## 12 pv_pv_bck vs sp_sp_tro 1 11559.789 1.4070061 0.1899561 0.080 1.000
## 13 pv_pv_tro vs sp_sp_bck 1 10597.093 1.4731946 0.1738653 0.030 0.450
## 14 pv_pv_tro vs sp_sp_tro 1 12536.034 1.5181994 0.1443402 0.031 0.465
## 15 sp_sp_bck vs sp_sp_tro 1 8377.115 1.0105796 0.1441507 0.420 1.000
## sig
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
mod <- betadisper(dist_tab_assay,samplesTroBck2$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## gm_gm_tro-gm_gm_bck 12.644076 -12.863302 38.15145 0.6286622
## pv_pv_bck-gm_gm_bck 7.423454 -21.094661 35.94157 0.9596342
## pv_pv_tro-gm_gm_bck 21.751592 -2.945821 46.44900 0.1041111
## sp_sp_bck-gm_gm_bck 8.592794 -19.925321 37.11091 0.9273395
## sp_sp_tro-gm_gm_bck 27.534088 2.026710 53.04147 0.0299137
## pv_pv_bck-gm_gm_tro -5.220622 -30.728000 20.28676 0.9856643
## pv_pv_tro-gm_gm_tro 9.107516 -12.042085 30.25712 0.7487049
## sp_sp_bck-gm_gm_tro -4.051282 -29.558660 21.45610 0.9954921
## sp_sp_tro-gm_gm_tro 14.890012 -7.200025 36.98005 0.3142160
## pv_pv_tro-pv_pv_bck 14.328138 -10.369275 39.02555 0.4695593
## sp_sp_bck-pv_pv_bck 1.169340 -27.348775 29.68746 0.9999939
## sp_sp_tro-pv_pv_bck 20.110634 -5.396744 45.61801 0.1759622
## sp_sp_bck-pv_pv_tro -13.158798 -37.856210 11.53861 0.5578661
## sp_sp_tro-pv_pv_tro 5.782496 -15.367104 26.93210 0.9506257
## sp_sp_tro-sp_sp_bck 18.941294 -6.566084 44.44867 0.2243494
# September 2018 dataset
# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesBck<-read.table('tximport_design_gardenShort_bck.txt',header=T)
samplesGsBck<-read.table('tximport_design_gardenShort_gs_bck.txt',header=T)
samplesGsBck2<-read.table('tximport_design_gardenShort_gs_bck_2.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/gardenShort/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_bck <- raw_counts[,grep("bck", colnames(raw_counts))]
raw_counts_bck_gs <- raw_counts[,grep("bck|gm_gm_gas|pv_pv_gas|sp_sp_gas", colnames(raw_counts))]
# DDS object
ddsBck<-DESeqDataSetFromMatrix(countData = raw_counts_bck, colData = samplesBck,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ddsBckGs<-DESeqDataSetFromMatrix(countData = raw_counts_bck_gs, colData = samplesGsBck,design = ~site + experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ddsBckGs2<-DESeqDataSetFromMatrix(countData = raw_counts_bck_gs, colData = samplesGsBck2,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(ddsBck)) >= 10
ddsBck <- ddsBck[keep,]
keep <- rowSums(counts(ddsBckGs)) >= 10
ddsBckGs <- ddsBckGs[keep,]
keep <- rowSums(counts(ddsBckGs2)) >= 10
ddsBckGs2 <- ddsBckGs2[keep,]
# Differential expression analysis
ddsBck<-DESeq(ddsBck)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsBck))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_gm_bck"
ddsBckGs<-DESeq(ddsBckGs)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 150 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(ddsBckGs))
## [,1]
## [1,] "Intercept"
## [2,] "site_pv_vs_gm"
## [3,] "site_sp_vs_gm"
## [4,] "experiment_gas_vs_bck"
ddsBckGs2<-DESeq(ddsBckGs2)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 73 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(ddsBckGs2))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_gm_gm_gas_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_pv_pv_gas_vs_gm_gm_bck"
## [5,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_gm_bck"
## [6,] "originSite_finalSite_experiment_sp_sp_gas_vs_gm_gm_bck"
# General
vsdSept2018Bck = vst(ddsBck,blind=T)
vsdSept2018BckGs = vst(ddsBckGs,blind=T)
vsdSept2018BckGs2 = vst(ddsBckGs2,blind=T)
pcaData = plotPCA(vsdSept2018Bck, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - Background") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

pcaData = plotPCA(vsdSept2018BckGs, intgroup=c("site","experiment"),returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site,shape = experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
scale_shape_manual(values = c("circle", "triangle")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - Background & garden short same site") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Common gm
res_pv_gm_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_gm_lfc2<-trimFunction(res_pv_gm_lfc2,0.05,1)
res_sp_gm_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_gm_lfc2<-trimFunction(res_sp_gm_lfc2,0.05,1)
common_gm_lfc2 <- merge(res_pv_gm_lfc2,res_sp_gm_lfc2,by="genes")
common_gm_annot_lfc2 = merge(common_gm_lfc2,A_pfam,by="genes")
common_gm_all_lfc2 <- applyAnnot(common_gm_lfc2,common_gm_annot_lfc2)
newColNames = samplesGsBck2$originSite_finalSite_experiment
vsd_common_gm_sept2018 = heatmapFunction(newColNames,common_gm_lfc2,common_gm_all_lfc2,vsdSept2018BckGs2,"Sept2018 common GM genes")

# Common pv
res_gm_pv_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_pv_lfc2<-trimFunction(res_gm_pv_lfc2,0.05,1)
res_sp_pv_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","sp_sp_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_pv_lfc2<-trimFunction(res_sp_pv_lfc2,0.05,1)
common_pv_lfc2 <- merge(res_gm_pv_lfc2,res_sp_pv_lfc2,by="genes")
common_pv_annot_lfc2 = merge(common_pv_lfc2,A_pfam,by="genes")
common_pv_all_lfc2 <- applyAnnot(common_pv_lfc2,common_pv_annot_lfc2)
newColNames = samplesGsBck2$originSite_finalSite_experiment
vsd_common_pv_sept2018 = heatmapFunction(newColNames,common_pv_lfc2,common_pv_all_lfc2,vsdSept2018BckGs2,"Sept2018 common PV genes")

# Common sp
res_gm_sp_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_sp_lfc2<-trimFunction(res_gm_sp_lfc2,0.05,1)
res_pv_sp_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","pv_pv_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_sp_lfc2<-trimFunction(res_pv_sp_lfc2,0.05,1)
common_sp_lfc2 <- merge(res_gm_sp_lfc2,res_pv_sp_lfc2,by="genes")
common_sp_annot_lfc2 = merge(common_sp_lfc2,A_pfam,by="genes")
common_sp_all_lfc2 <- applyAnnot(common_sp_lfc2,common_sp_annot_lfc2)
newColNames = samplesGsBck2$originSite_finalSite_experiment
vsd_common_sp_sept2018 = heatmapFunction(newColNames,common_sp_lfc2,common_sp_all_lfc2,vsdSept2018BckGs2,"Sept2018 common SP genes")

# Inferences statistics
count_tab_assay <- assay(vsdSept2018Bck)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesBck,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesBck, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 21353 0.33483 1.5101 0.005 **
## Residual 6 42420 0.66517
## Total 8 63773 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesBck$originSite_finalSite_experiment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 gm_gm_bck vs pv_pv_bck 1 10427.74 1.297030 0.2448599 0.1 0.3
## 2 gm_gm_bck vs sp_sp_bck 1 11439.66 1.472259 0.2690405 0.1 0.3
## 3 pv_pv_bck vs sp_sp_bck 1 10162.06 1.881833 0.3199399 0.1 0.3
mod <- betadisper(dist_tab_assay,samplesBck$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv_pv_bck-gm_gm_bck -20.366677 -67.75532 27.02196 0.4363563
## sp_sp_bck-gm_gm_bck -22.954352 -70.34299 24.43429 0.3611737
## sp_sp_bck-pv_pv_bck -2.587675 -49.97631 44.80096 0.9846831
count_tab_assay <- assay(vsdSept2018BckGs2)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGsBck2,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesGsBck2, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 5 67047 0.30745 1.9534 0.001 ***
## Residual 22 151025 0.69255
## Total 27 218072 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGsBck2$originSite_finalSite_experiment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 gm_gm_bck vs gm_gm_gas 1 9944.572 1.336390 0.1431378 0.055 0.825
## 2 gm_gm_bck vs pv_pv_bck 1 10485.788 1.294760 0.2445361 0.100 1.000
## 3 gm_gm_bck vs pv_pv_gas 1 18669.473 2.304333 0.2476624 0.015 0.225
## 4 gm_gm_bck vs sp_sp_bck 1 11507.405 1.468143 0.2684902 0.100 1.000
## 5 gm_gm_bck vs sp_sp_gas 1 14734.198 1.885532 0.2122025 0.018 0.270
## 6 gm_gm_gas vs pv_pv_bck 1 9948.023 1.588755 0.1656894 0.015 0.225
## 7 gm_gm_gas vs pv_pv_gas 1 16696.124 2.468151 0.1832583 0.001 0.015
## 8 gm_gm_gas vs sp_sp_bck 1 14704.232 2.398249 0.2306397 0.007 0.105
## 9 gm_gm_gas vs sp_sp_gas 1 11894.153 1.807172 0.1411062 0.009 0.135
## 10 pv_pv_bck vs pv_pv_gas 1 9372.151 1.387748 0.1654494 0.052 0.780
## 11 pv_pv_bck vs sp_sp_bck 1 10210.338 1.863755 0.3178432 0.100 1.000
## 12 pv_pv_bck vs sp_sp_gas 1 8730.180 1.350178 0.1616945 0.074 1.000
## 13 pv_pv_gas vs sp_sp_bck 1 24817.344 3.757582 0.3492962 0.017 0.255
## 14 pv_pv_gas vs sp_sp_gas 1 12937.022 1.859292 0.1567793 0.010 0.150
## 15 sp_sp_bck vs sp_sp_gas 1 13810.309 2.186195 0.2379870 0.033 0.495
## sig
## 1
## 2
## 3
## 4
## 5
## 6
## 7 .
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
mod <- betadisper(dist_tab_assay,samplesGsBck2$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## gm_gm_gas-gm_gm_bck -7.5289472 -31.498202 16.440307 0.9199561
## pv_pv_bck-gm_gm_bck -20.2226896 -48.583494 8.138115 0.2680148
## pv_pv_gas-gm_gm_bck -4.6719591 -29.233136 19.889218 0.9904878
## sp_sp_bck-gm_gm_bck -22.6996779 -51.060482 5.661126 0.1690619
## sp_sp_gas-gm_gm_bck -6.8224507 -31.383628 17.738726 0.9508675
## pv_pv_bck-gm_gm_gas -12.6937424 -36.662997 11.275512 0.5765839
## pv_pv_gas-gm_gm_gas 2.8569881 -16.467643 22.181619 0.9970448
## sp_sp_bck-gm_gm_gas -15.1707307 -39.139985 8.798524 0.3887079
## sp_sp_gas-gm_gm_gas 0.7064964 -18.618134 20.031127 0.9999969
## pv_pv_gas-pv_pv_bck 15.5507305 -9.010446 40.111907 0.3883433
## sp_sp_bck-pv_pv_bck -2.4769883 -30.837793 25.883816 0.9997668
## sp_sp_gas-pv_pv_bck 13.4002388 -11.160938 37.961416 0.5461570
## sp_sp_bck-pv_pv_gas -18.0277188 -42.588896 6.533458 0.2411619
## sp_sp_gas-pv_pv_gas -2.1504917 -22.204609 17.903625 0.9993656
## sp_sp_gas-sp_sp_bck 15.8772271 -8.683950 40.438404 0.3664698
count_tab_assay <- assay(vsdSept2018BckGs)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGsBck,dist_tab_assay ~ site + experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ site + experiment, data = samplesGsBck, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## site 2 33920 0.15554 2.4517 0.001 ***
## experiment 1 18126 0.08312 2.6203 0.001 ***
## Residual 24 166026 0.76133
## Total 27 218072 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGsBck$site)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 gm vs pv 1 19170.90 2.584045 0.1319464 0.001 0.003 *
## 2 gm vs sp 1 14511.28 1.934758 0.1021803 0.004 0.012 .
## 3 pv vs sp 1 17210.66 2.401285 0.1304955 0.009 0.027 .
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGsBck$experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 bck vs gas 1 17845.25 2.317261 0.08183209 0.003 0.003 *
mod <- betadisper(dist_tab_assay,samplesGsBck$site)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv-gm -3.510399 -18.58116 11.56036 0.8318672
## sp-gm -2.479455 -17.55021 12.59130 0.9119145
## sp-pv 1.030944 -14.43133 16.49322 0.9849188
# May2018 & Sept2018 - Background
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesBck<-read.table('tximport_design_background2018.txt',header=T)
dataPathBck<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/bck'
setwd(dataPathBck)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/bck", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
samplesOrder = c(4,5,6,13,14,15,1,2,3,10,11,12,7,8,9,16,17,18)
raw_counts <- raw_counts[,samplesOrder]
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts, colData = samplesBck,design = ~originSite_finalSite_experiment + dataset)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_gm_bck"
## [4,] "dataset_sept2018_vs_may2018"
# General
vsd = vst(dds,blind=T)
pcaData = plotPCA(vsd, intgroup=c("originSite_finalSite_experiment","dataset"),returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment,shape=dataset)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
scale_shape_manual(values = c("circle", "triangle")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 & sept2018 dataset - Background") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Common gm
res_pv_gm_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_gm_lfc2<-trimFunction(res_pv_gm_lfc2,0.05,1)
res_sp_gm_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_gm_lfc2<-trimFunction(res_sp_gm_lfc2,0.05,1)
common_gm_lfc2 <- merge(res_pv_gm_lfc2,res_sp_gm_lfc2,by="genes")
common_gm_annot_lfc2 = merge(common_gm_lfc2,A_pfam,by="genes")
common_gm_all_lfc2 <- applyAnnot(common_gm_lfc2,common_gm_annot_lfc2)
newColNames = samplesBck$originSite_finalSite_experiment
vsd_common_gm = heatmapFunction(newColNames,common_gm_lfc2,common_gm_all_lfc2,vsd,"May2018 & Sept2018 common GM genes")

# Common pv
res_gm_pv_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_pv_lfc2<-trimFunction(res_gm_pv_lfc2,0.05,1)
res_sp_pv_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_pv_lfc2<-trimFunction(res_sp_pv_lfc2,0.05,1)
common_pv_lfc2 <- merge(res_gm_pv_lfc2,res_sp_pv_lfc2,by="genes")
common_pv_annot_lfc2 = merge(common_pv_lfc2,A_pfam,by="genes")
common_pv_all_lfc2 <- applyAnnot(common_pv_lfc2,common_pv_annot_lfc2)
newColNames = samplesBck$originSite_finalSite_experiment
vsd_common_pv_ = heatmapFunction(newColNames,common_pv_lfc2,common_pv_all_lfc2,vsd,"May2018 & Sept2018 common PV genes")

# Common sp
res_gm_sp_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_sp_lfc2<-trimFunction(res_gm_sp_lfc2,0.05,1)
res_pv_sp_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_sp_lfc2<-trimFunction(res_pv_sp_lfc2,0.05,1)
common_sp_lfc2 <- merge(res_gm_sp_lfc2,res_pv_sp_lfc2,by="genes")
common_sp_annot_lfc2 = merge(common_sp_lfc2,A_pfam,by="genes")
common_sp_all_lfc2 <- applyAnnot(common_sp_lfc2,common_sp_annot_lfc2)
newColNames = samplesBck$originSite_finalSite_experiment
vsd_common_sp = heatmapFunction(newColNames,common_sp_lfc2,common_sp_all_lfc2,vsd,"May2018 & Sept2018 common SP genes")

# Inferences statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesBck,dist_tab_assay ~ originSite_finalSite_experiment + dataset, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment + dataset, data = samplesBck, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 19376 0.19413 1.8294 0.001 ***
## dataset 1 6294 0.06305 1.1884 0.156
## Residual 14 74142 0.74282
## Total 17 99812 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesBck$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 pv_pv_bck vs gm_gm_bck 1 10638.019 1.956959 0.1636669 0.010 0.030 .
## 2 pv_pv_bck vs sp_sp_bck 1 9361.071 1.896908 0.1594455 0.005 0.015 .
## 3 gm_gm_bck vs sp_sp_bck 1 9064.984 1.585828 0.1368765 0.004 0.012 .
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesBck$dataset)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 may2018 vs sept2018 1 6293.505 1.076753 0.06305371 0.288 0.288
mod <- betadisper(dist_tab_assay,samplesBck$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv_pv_bck-gm_gm_bck -5.294183 -34.62293 24.03456 0.8867651
## sp_sp_bck-gm_gm_bck -1.712932 -31.04168 27.61581 0.9874031
## sp_sp_bck-pv_pv_bck 3.581252 -25.74749 32.91000 0.9462536
mod <- betadisper(dist_tab_assay,samplesBck$dataset)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## sept2018-may2018 3.760636 -10.99637 18.51764 0.596478